package com.leadingsoft.controller.parse;

import java.util.ArrayList;

import org.beetl.sql.core.SQLManager;
import org.cef.callback.CefStringVisitor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.leadingsoft.common.model.HotelComment;
import com.leadingsoft.common.model.HotelCommentLog;
import com.leadingsoft.common.model.HotelLog;

/**
 * @ClassName ParsingHtmlDocument_v2
 * @Description 解析HTML示例<br>
 *              在visit方法中通过编写cssQuery选择器进行抓取数据<br>
 *              示例网站：携程酒店评论信息
 * @author gongym
 * @date 2018年6月9日 下午10:32:26
 */
public class ParsingHtmlDocument_v2 implements CefStringVisitor {
	private String url;
	private String listSelector;
	private SQLManager sqlManager;

	public ParsingHtmlDocument_v2(String url, String listSelector, SQLManager sqlManager) {
		this.url = url;
		this.listSelector = listSelector;
		this.sqlManager = sqlManager;
	}
	@Override
	public void visit(String string) {
		Document htmlDocument = Jsoup.parse(string);
		Elements elementsByClass = htmlDocument.getElementsByClass(listSelector);
		java.util.List<HotelComment> hotelCommentList = new ArrayList<HotelComment>();
		// 检查当前URL是否被解析过了
		HotelLog hotelLogTemplate = new HotelLog();
		hotelLogTemplate.setSpideredUrl(url);
		HotelLog hotelLog = sqlManager.templateOne(hotelLogTemplate);
		if (null == hotelLog) {
			elementsByClass.forEach((document) -> {
				HotelComment hotelComment = new HotelComment();
				// 使用Jsoup解析网页
				hotelCommentList.add(hotelComment);
			});
			sqlManager.insertBatch(HotelComment.class, hotelCommentList);
			HotelCommentLog nowHotelCommentLog = new HotelCommentLog();
			nowHotelCommentLog.setSpideredUrl(url);
			sqlManager.insert(nowHotelCommentLog);
		}
	}
}
